Planning considerations: - Parking accommodations - Hotel reservations/pricing packages - Race day support like Police, volunteers, space to physically queue up the racers to start the race - SWAG and timing chip orders - SWAG and bib/chip distribution - Logistics to transition to a virtual race (Do we want to consider this piece?) around shipping everything
#Libraries
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(RColorBrewer)
library(chron)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:chron':
##
## days, hours, minutes, seconds, years
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Loading required package: viridisLite
CBWtableRaw <- readRDS("C:/Users/sabri/Downloads/CBWtableRaw.rds")
head(CBWtableRaw)
## Race Name Age Time Pace PiS/TiS Division PiD/TiD
## 1 1999 10M Jane Omoro (W) 26 0:53:37 5:22 1/2358 W2529 1/559
## 2 1999 10M Jane Ngotho (W) 29 0:53:38 5:22 2/2358 W2529 2/559
## 3 1999 10M Lidiya Grigoryeva (W) NR 0:53:40 5:22 3/2358 NR NR
## 4 1999 10M Eunice Sagero (W) 20 0:53:55 5:24 4/2358 W2024 1/196
## 5 1999 10M Alla Zhilyayeva (W) 29 0:54:08 5:25 5/2358 W2529 3/559
## 6 1999 10M Teresa Wanjiku (W) 24 0:54:10 5:25 6/2358 W2024 2/196
## Hometown year divisionTitle section page
## 1 Kenya 1999 Overall+Women 10M 1
## 2 Kenya 1999 Overall+Women 10M 1
## 3 Russia 1999 Overall+Women 10M 1
## 4 Kenya 1999 Overall+Women 10M 1
## 5 Russia 1999 Overall+Women 10M 1
## 6 Kenya 1999 Overall+Women 10M 1
## source
## 1 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 2 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 3 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 4 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 5 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 6 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## sex
## 1 W
## 2 W
## 3 W
## 4 W
## 5 W
## 6 W
str(CBWtableRaw)
## 'data.frame': 75866 obs. of 15 variables:
## $ Race : chr "1999 10M" "1999 10M" "1999 10M" "1999 10M" ...
## $ Name : chr "Jane Omoro (W)" "Jane Ngotho (W)" "Lidiya Grigoryeva (W)" "Eunice Sagero (W)" ...
## $ Age : chr "26" "29" "NR" "20" ...
## $ Time : chr "0:53:37" "0:53:38" "0:53:40" "0:53:55" ...
## $ Pace : chr "5:22" "5:22" "5:22" "5:24" ...
## $ PiS/TiS : chr "1/2358" "2/2358" "3/2358" "4/2358" ...
## $ Division : chr "W2529" "W2529" "NR" "W2024" ...
## $ PiD/TiD : chr "1/559" "2/559" "NR" "1/196" ...
## $ Hometown : chr "Kenya" "Kenya" "Russia" "Kenya" ...
## $ year : int 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
## $ divisionTitle: chr "Overall+Women" "Overall+Women" "Overall+Women" "Overall+Women" ...
## $ section : chr "10M" "10M" "10M" "10M" ...
## $ page : int 1 1 1 1 1 1 1 1 1 1 ...
## $ source : chr "http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999" "http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999" "http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999" "http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999" ...
## $ sex : chr "W" "W" "W" "W" ...
Note above I see an NR, not a na, nan or other blank. Will need to factor with NR
noresults = CBWtableRaw %>% dplyr::filter(Division == "NR")
dim(noresults)
## [1] 19 15
head(noresults)
## Race Name Age Time Pace PiS/TiS Division PiD/TiD
## 1 1999 10M Lidiya Grigoryeva (W) NR 0:53:40 5:22 3/2358 NR NR
## 2 1999 10M Gladys Asiba (W) NR 0:54:50 5:29 8/2358 NR NR
## 3 1999 10M Connie Buckwalter (W) NR 0:59:36 5:58 17/2358 NR NR
## 4 1999 10M Ann Reid (W) NR 1:53:03 11:18 2176/2358 NR NR
## 5 2001 10M Loretta Cuce (W) NR 1:53:38 11:22 2611/2972 NR NR
## 6 2002 10M Unidentified Runner (W) NR 1:19:45 7:59 270/3333 NR NR
## Hometown year divisionTitle section page
## 1 Russia 1999 Overall+Women 10M 1
## 2 Kenya 1999 Overall+Women 10M 1
## 3 Lancaster, PA 1999 Overall+Women 10M 1
## 4 Bethesda, MD 1999 Overall+Women 10M 109
## 5 Alexandria, VA 2001 Overall+Women 10M 131
## 6 Washington, DC 2002 Overall+Women 10M 14
## source
## 1 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 2 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 3 http://www.cballtimeresults.org/performances?division=Overall+Women&page=1§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 4 http://www.cballtimeresults.org/performances?division=Overall+Women&page=109§ion=10M&sex=W&utf8=%E2%9C%93&year=1999
## 5 http://www.cballtimeresults.org/performances?division=Overall+Women&page=131§ion=10M&sex=W&utf8=%E2%9C%93&year=2001
## 6 http://www.cballtimeresults.org/performances?division=Overall+Women&page=14§ion=10M&sex=W&utf8=%E2%9C%93&year=2002
## sex
## 1 W
## 2 W
## 3 W
## 4 W
## 5 W
## 6 W
# Remove no results
data = CBWtableRaw %>%
dplyr::filter(Division != "NR")
convert race year to factor
#looking at race year
# Participants by Year
plotdata = CBWtableRaw %>%
group_by(year) %>%
summarise(count=n())
## `summarise()` ungrouping output (override with `.groups` argument)
p = plotdata %>%
ggplot(aes(x=year, y=count)) +
geom_line() +
geom_point()
ggplotly(p)
p = plotdata %>%
ggplot(aes(x = year, y = count)) +
geom_bar(stat = "identity", fill = "blue")
ggplotly(p)
# looking at counts with division splits too
plotdata = CBWtableRaw %>%
group_by(year, Division) %>%
summarise(count = n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
p = plotdata %>%
ggplot(aes(x = year, y = count, fill = Division)) +
geom_bar(stat = "identity", position = "stack")
ggplotly(p)
plotdata_by_year = CBWtableRaw %>%
group_by(year) %>%
summarise(count_year = n())
## `summarise()` ungrouping output (override with `.groups` argument)
colourCount = length(unique(CBWtableRaw$Division))
getPalette = colorRampPalette(brewer.pal(9, "Paired"))
plotdata_by_year_div = CBWtableRaw %>%
group_by(year, Division) %>%
summarise(count_year_div = n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
plotdata = plotdata_by_year %>%
plyr::join(plotdata_by_year_div, by = "year", type = "full") %>%
mutate(percent = round(count_year_div/count_year*100,1))
p = plotdata %>%
ggplot(aes(x = year, y = percent, fill = Division)) +
geom_bar(stat = "identity", position = "stack") + scale_fill_manual(values = getPalette(colourCount)) +
theme(legend.position="bottom") +
guides(fill=guide_legend(nrow=2))
ggplotly(p)
#heatmap on the distribution - are they getting any younger?
p = plotdata %>%
ggplot(aes(x = year, y = Division, fill = percent )) +
geom_tile(alpha=.5) + scale_fill_viridis()
ggplotly(p)
convert time to minutes
noresults = CBWtableRaw %>% dplyr::filter(Time == "NR")
dim(noresults)
## [1] 0 15
head(noresults)
## [1] Race Name Age Time Pace
## [6] PiS/TiS Division PiD/TiD Hometown year
## [11] divisionTitle section page source sex
## <0 rows> (or 0-length row.names)
tail(CBWtableRaw)
## Race Name Age Time Pace PiS/TiS Division
## 75861 2012 10M Effie Harary (W) 39 2:32:08 15:13 9724/9729 W3539
## 75862 2012 10M Khristina Nava (W) 40 2:33:11 15:19 9725/9729 W4044
## 75863 2012 10M Geneva Dixon (W) 31 2:36:03 15:36 9726/9729 W3034
## 75864 2012 10M Veronica Eligan (W) 55 2:36:45 15:41 9727/9729 W5559
## 75865 2012 10M Denise Bobba (W) 40 2:36:54 15:41 9728/9729 W4044
## 75866 2012 10M Rashonna Waples (W) 38 2:50:58 17:06 9729/9729 W3539
## PiD/TiD Hometown year divisionTitle section page
## 75861 1365/1366 Long Branch, NJ 2012 Overall+Women 10M 487
## 75862 973/974 Fort Meade, MD 2012 Overall+Women 10M 487
## 75863 2228/2228 Manassas Park, VA 2012 Overall+Women 10M 487
## 75864 236/236 Mitchellville, MD 2012 Overall+Women 10M 487
## 75865 974/974 Herndon, VA 2012 Overall+Women 10M 487
## 75866 1366/1366 District Heights, MD 2012 Overall+Women 10M 487
## source
## 75861 http://www.cballtimeresults.org/performances?division=Overall+Women&page=487§ion=10M&sex=W&utf8=%E2%9C%93&year=2012
## 75862 http://www.cballtimeresults.org/performances?division=Overall+Women&page=487§ion=10M&sex=W&utf8=%E2%9C%93&year=2012
## 75863 http://www.cballtimeresults.org/performances?division=Overall+Women&page=487§ion=10M&sex=W&utf8=%E2%9C%93&year=2012
## 75864 http://www.cballtimeresults.org/performances?division=Overall+Women&page=487§ion=10M&sex=W&utf8=%E2%9C%93&year=2012
## 75865 http://www.cballtimeresults.org/performances?division=Overall+Women&page=487§ion=10M&sex=W&utf8=%E2%9C%93&year=2012
## 75866 http://www.cballtimeresults.org/performances?division=Overall+Women&page=487§ion=10M&sex=W&utf8=%E2%9C%93&year=2012
## sex
## 75861 W
## 75862 W
## 75863 W
## 75864 W
## 75865 W
## 75866 W
CBWtableRaw$Time <- chron(times=CBWtableRaw$Time) #formatting the character string for time to HH:MM:SS
sum(is.na(CBWtableRaw$Time))
## [1] 0
res <- hms(CBWtableRaw$Time) # Identifying Hours, Minutes, Seconds
CBWtableRaw$Time = hour(res)*60 + minute(res) + second(res)/60 # convert hours to minutes, add minutes, and convert seconds to minutes and add
plotdata = CBWtableRaw %>%
group_by(year, Division) %>%
summarise(count = n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
colourCount = length(unique(CBWtableRaw$Division))
getPalette = colorRampPalette(brewer.pal(9, "Paired"))
p = plotdata %>%
ggplot(aes(x = year, y=count, group = year, fill = Division)) +
geom_bar(stat = "identity") + facet_wrap(~Division) + scale_fill_manual(values = getPalette(colourCount)) +
theme(legend.position="bottom") +
guides(fill=guide_legend(nrow=2))
ggplotly(p, tooltip="text")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
p = plotdata %>%
ggplot(aes(x = year, y=count, group = year, fill = Division)) +
geom_bar(stat = "identity") + facet_wrap(~Division, scales = "free") + scale_fill_manual(values = getPalette(colourCount)) +
theme(legend.position="bottom") +
guides(fill=guide_legend(nrow=2))
ggplotly(p, tooltip="text")
boxplots for medians by year
p = CBWtableRaw %>%
ggplot(aes(x = year, y = Time, group = year, color = year)) +
geom_boxplot()
ggplotly(p, tooltip="text")
looking at median time by division
p = CBWtableRaw %>%
ggplot(aes(x = year, y = Time, group = year, color = year)) +
geom_boxplot() + facet_wrap(~Division)
ggplotly(p, tooltip="text")
#predict LM
#Predict LOESS on time
plot(Time~year, ylim = c(94,100), data=CBWtableRaw, main="Year V Time")
out <- loess(Time~year, data=CBWtableRaw)
curve(predict(out, newdata=data.frame(year = x)), add=TRUE)
Run a piecewise fit next
evaluate city/state impact for recommendations for planning
#split out city and state to see in town/out of town distribution
library(dplyr)
library(tidyr)
#before <- CBWtableRaw
#unlist(strsplit(before$Hometown, "[, ]"))##Not working
#head(before, 10)